Developer CD Series 2000 August: Tool Chest

home *** CD-ROM | disk | FTP | other *** search

/ Developer CD Series 2000 August: Tool Chest / Dev.CD Aug 00 TC Disk 1.toast / pc / sample code / devices and hardware / velocity engine / veleng wavelet / source / swavelet_int.c < prev next >

Wrap

C/C++ Source or Header | 2000-06-23 | 19.5 KB | 771 lines

/* File: SWavelet_int.c Contains: Scalar integer implementation Daubechies D4 wavelet code Copyright: Copyright © 2000 by Apple Computer, Inc., All Rights Reserved. Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Computer, Inc. ("Apple") in consideration of your agreement to the following terms, and your use, installation, modification or redistribution of this Apple software constitutes acceptance of these terms. If you do not agree with these terms, please do not use, install, modify or redistribute this Apple software. In consideration of your agreement to abide by the following terms, and subject to these terms, Apple grants you a personal, non-exclusive license, under Apple’s copyrights in this original Apple software (the "Apple Software"), to use, reproduce, modify and redistribute the Apple Software, with or without modifications, in source and/or binary forms; provided that if you redistribute the Apple Software in its entirety and without modifications, you must retain this notice and the following text and disclaimers in all such redistributions of the Apple Software. Neither the name, trademarks, service marks or logos of Apple Computer, Inc. may be used to endorse or promote products derived from the Apple Software without specific prior written permission from Apple. Except as expressly stated in this notice, no other rights or licenses, express or implied, are granted by Apple herein, including but not limited to any patent rights that may be infringed by your derivative works or by other works in which the Apple Software may be incorporated. The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS. IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef __MWERKS__ #include <altivec.h> #endif #include <MacTypes.h> #include <MacMemory.h> #include "sWavelet_int.h" #define DO_HORIZ 1 #define DO_VERT 1 #define INT_WAVELET_DO_ENDS 1 #define INT_SHIFT_AMOUNT 0 #define H0 (11 * (1<< INT_SHIFT_AMOUNT)) #define H1 (19 * (1<< INT_SHIFT_AMOUNT)) #define H2 (5 * (1<< INT_SHIFT_AMOUNT)) #define H3 (-3 * (1<< INT_SHIFT_AMOUNT)) #define INT_WAVELET_SHIFT_AMOUNT 4 #define INVERSE_WAVELET_SHIFT_AMOUNT 5 #define INT_WAVELET_DIVIDE_AMOUNT (1 << INT_WAVELET_SHIFT_AMOUNT) #define INVERSE_WAVELET_DIVIDE_AMOUNT (1 << INVERSE_WAVELET_SHIFT_AMOUNT) #define HALF_SHIFT_ADD (1 << (INT_WAVELET_SHIFT_AMOUNT-1)) static void ExpandQuad8To16( unsigned char *pSrc, unsigned short *pDst, unsigned long length) { int vectorInIndex; for (vectorInIndex = 0; vectorInIndex < length; vectorInIndex++) { *pDst++ = *pSrc++; } } static void Pack16To8( signed short *pSrc, unsigned char *pDest, unsigned long length) { signed short element; int i; for (i = 0; i<length; i++) { element = *pSrc++; if (element < 0) element = 0; if (element > 0xff) element = 0xff; *pDest++ = element; } } void PadXY_32Bit(unsigned long *pSrc, Handle h2, unsigned long rowBytes, long x, long y, long padX, long padY) { unsigned long *pInQuad; unsigned long *pOutQuad; unsigned long outQuads = padX*padY; unsigned long inQuads = x*y; int i,j; ::SetHandleSize(h2, padX*padY*4); ThrowIfMemError_(); pInQuad = pSrc; pOutQuad = (unsigned long*)(*h2); for (i=0; i<y; i++) { pInQuad = pSrc + ((rowBytes/4)*i); for (j=0; j<x; j++) { *pOutQuad++ = *pInQuad++; } for (; j<padX; j++) { *pOutQuad++ = 0; } } for (;i<padY; i++) { for (j=0; j<padX; j++) { *pOutQuad++ = 0; } } } #pragma mark - void sFWVT_4_Quad16_Int_Vertical(short *pSrc, short *pDest, unsigned long numQuads, unsigned long skipQuadCount, unsigned long columns) { long vectorInIndex; long columnIndex; long length = (numQuads/2); signed short *pSrcInput; signed short *pLoOutput; signed short *pHiOutput; signed short A_First; signed short A_Second; signed short element0; signed short element1; signed short element2; signed short element3; signed long result0; signed long result1; for (columnIndex = 0; columnIndex < columns*4; columnIndex++) { pSrcInput = pSrc + (columnIndex); pLoOutput = pDest+ (columnIndex); pHiOutput = pLoOutput + ((skipQuadCount*4)*(numQuads/2)); A_First = element0 = *pSrcInput; pSrcInput += (skipQuadCount*4); A_Second = element1 = *pSrcInput; pSrcInput += (skipQuadCount*4); element2 = *pSrcInput; pSrcInput += (skipQuadCount*4); element3 = *pSrcInput; pSrcInput += (skipQuadCount*4); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput = result0; *pHiOutput = result1; pLoOutput += (skipQuadCount*4); pHiOutput += (skipQuadCount*4); element0 = element2; element1 = element3; element2 = *pSrcInput; pSrcInput += (skipQuadCount*4); element3 = *pSrcInput; pSrcInput += (skipQuadCount*4); for (vectorInIndex = 0; vectorInIndex < length-2; vectorInIndex++) { result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput = result0; *pHiOutput = result1; pLoOutput += (skipQuadCount*4); pHiOutput += (skipQuadCount*4); element0 = element2; element1 = element3; element2 = *pSrcInput; pSrcInput += (skipQuadCount*4); element3 = *pSrcInput; pSrcInput += (skipQuadCount*4); } element2 = A_First; element3 = A_Second; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput = result0; *pHiOutput = result1; } } void sFWVT_4_Quad16_Int( signed short *pSrc, signed short *pDest, unsigned long numQuads, unsigned long skipQuads, unsigned long numRows) { long vectorInIndex; long elementIndex; long rowIndex; signed short *pSrcInput; signed short *pLoOutput; signed short *pHiOutput; signed short First_A; signed short First_R; signed short First_G; signed short First_B; signed short Second_A; signed short Second_R; signed short Second_G; signed short Second_B; signed short element0; signed short element1; signed short element2; signed short element3; signed long result0; signed long result1; for (rowIndex = 0; rowIndex < numRows; rowIndex++) { pSrcInput = pSrc+(rowIndex*(skipQuads*4)); pLoOutput = pDest+(rowIndex*(skipQuads*4)); pHiOutput = pLoOutput + (numQuads*2); /////////////////////////////// First_A = element0 = *pSrcInput; Second_A = element1 = *(pSrcInput+4); element2 = *(pSrcInput+8); element3 = *(pSrcInput+12); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// First_R = element0 = *pSrcInput; Second_R = element1 = *(pSrcInput+4); element2 = *(pSrcInput+8); element3 = *(pSrcInput+12); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// First_G = element0 = *pSrcInput; Second_G = element1 = *(pSrcInput+4); element2 = *(pSrcInput+8); element3 = *(pSrcInput+12); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// First_B = element0 = *pSrcInput; Second_B = element1 = *(pSrcInput+4); element2 = *(pSrcInput+8); element3 = *(pSrcInput+12); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput += 5; for (vectorInIndex = 0; vectorInIndex < ((numQuads-4)/2); vectorInIndex++) { for (elementIndex = 0; elementIndex < 4; elementIndex++) { element0 = *pSrcInput; element1 = *(pSrcInput+4); element2 = *(pSrcInput+8); element3 = *(pSrcInput+12); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; } pSrcInput += 4; } /////////////////////////////// element0 = *pSrcInput; element1 = *(pSrcInput+4); element2 = First_A; element3 = Second_A; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// element0 = *pSrcInput; element1 = *(pSrcInput+4); element2 = First_R; element3 = Second_R; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// element0 = *pSrcInput; element1 = *(pSrcInput+4); element2 = First_G; element3 = Second_G; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput++ = result0; *pHiOutput++ = result1; pSrcInput++; /////////////////////////////// element0 = *pSrcInput; element1 = *(pSrcInput+4); element2 = First_B; element3 = Second_B; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H1)+(element2*H2)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)-(element1*H2)+(element2*H1)-(element3*H0); result0 /= INT_WAVELET_DIVIDE_AMOUNT; result1 /= INT_WAVELET_DIVIDE_AMOUNT; *pLoOutput = result0; *pHiOutput = result1; } } void sIFWVT_4_Quad16_Int_Vertical( signed short *pSrc, signed short *pDest, unsigned long numQuads, unsigned long skipQuadCount, unsigned long columns) { long vectorInIndex; long columnIndex; long length = (numQuads/2); long skipShorts = skipQuadCount * 4; signed short *pSrcLo; signed short *pSrcHi; signed short *pColumnOut; signed short element0; signed short element1; signed short element2; signed short element3; signed long result0; signed long result1; for (columnIndex = 0; columnIndex < columns*4; columnIndex++) { // set up pointers: pColumnOut = pDest + columnIndex; pSrcLo = pSrc + columnIndex; pSrcHi = pSrcLo + ((skipShorts)*(numQuads/2)); element0 = *pSrcLo; element1 = *(pSrcHi - skipShorts); element2 = *pSrcHi; element3 = *(pSrcLo+((numQuads-1)*(skipShorts))); pSrcHi +=skipShorts; pSrcLo +=skipShorts; result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H2)+(element2*H3)+(element3*H1); result1 = (HALF_SHIFT_ADD) + (element0*H1)+(element1*H3)-(element2*H2)-(element3*H0); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pColumnOut = result0; pColumnOut += skipShorts; *pColumnOut = result1; pColumnOut += skipShorts; for (vectorInIndex = 0; vectorInIndex < length-1; vectorInIndex++) { element1 = *pSrcLo; element3 = *pSrcHi; pSrcHi +=skipShorts; pSrcLo +=skipShorts; result0 = (HALF_SHIFT_ADD) + (element0*H2)+(element1*H0)+(element2*H1)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)+(element1*H1)-(element2*H0)-(element3*H2); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pColumnOut = result0; pColumnOut += skipShorts; *pColumnOut = result1; pColumnOut += skipShorts; element0 = element1; element2 = element3; } } } void sIFWVT_4_Quad16_Int( signed short *pSrc, signed short *pDest, unsigned long numQuads, unsigned long skipQuads, unsigned long numRows) { long length = (numQuads/2); long vectorIndex; long rowIndex; long skipShorts = skipQuads*4; long elementIndex; signed short *pRowDest; signed short *pSrcHi, *pSrcLo; signed short element0; signed short element1; signed short element2; signed short element3; signed long result0; signed long result1; for (rowIndex = 0; rowIndex < numRows; rowIndex++) { // set up pointers: pSrcLo = pSrc + (rowIndex*skipShorts); pSrcHi = pSrcLo + (numQuads/2)*4; pRowDest = pDest + (rowIndex*skipShorts); //////////////////////////////////// element0 = *pSrcLo; element1 = *(pSrcHi - 4); element2 = *pSrcHi; element3 = *(pSrcLo+((numQuads-1)*4)); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H2)+(element2*H3)+(element3*H1); result1 = (HALF_SHIFT_ADD) + (element0*H1)+(element1*H3)-(element2*H2)-(element3*H0); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pRowDest = result0; *((pRowDest++)+4) = result1; //////////////////////////////////// element0 = *(pSrcLo+1); element1 = *(pSrcHi - 3); element2 = *(pSrcHi+1); element3 = *(pSrcLo+((numQuads-1)*4)+1); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H2)+(element2*H3)+(element3*H1); result1 = (HALF_SHIFT_ADD) + (element0*H1)+(element1*H3)-(element2*H2)-(element3*H0); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pRowDest = result0; *((pRowDest++)+4) = result1; //////////////////////////////////// element0 = *(pSrcLo+2); element1 = *(pSrcHi - 2); element2 = *(pSrcHi+2); element3 = *(pSrcLo+((numQuads-1)*4)+2); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H2)+(element2*H3)+(element3*H1); result1 = (HALF_SHIFT_ADD) + (element0*H1)+(element1*H3)-(element2*H2)-(element3*H0); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pRowDest = result0; *((pRowDest++)+4) = result1; //////////////////////////////////// element0 = *(pSrcLo+3); element1 = *(pSrcHi - 1); element2 = *(pSrcHi+3); element3 = *(pSrcLo+((numQuads-1)*4)+3); result0 = (HALF_SHIFT_ADD) + (element0*H0)+(element1*H2)+(element2*H3)+(element3*H1); result1 = (HALF_SHIFT_ADD) + (element0*H1)+(element1*H3)-(element2*H2)-(element3*H0); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pRowDest = result0; *((pRowDest++)+4) = result1; pRowDest += 4; for (vectorIndex = 0; vectorIndex < (numQuads-2)/2; vectorIndex++) { for (elementIndex = 0; elementIndex < 4; elementIndex++) { element0 = *pSrcLo; element1 = *(pSrcLo + 4); element2 = *pSrcHi; element3 = *(pSrcHi + 4); result0 = (HALF_SHIFT_ADD) + (element0*H2)+(element1*H0)+(element2*H1)+(element3*H3); result1 = (HALF_SHIFT_ADD) + (element0*H3)+(element1*H1)-(element2*H0)-(element3*H2); result0 /= INVERSE_WAVELET_DIVIDE_AMOUNT; result1 /= INVERSE_WAVELET_DIVIDE_AMOUNT; *pRowDest = result0; *((pRowDest++)+4) = result1; pSrcLo++; pSrcHi++; } pRowDest += 4; } } } void sFWVT_4_Quad16_2DInt(signed short *pSrc, signed short *pDst, signed short *pTemp, unsigned long x, unsigned long y, unsigned long rowQuads, unsigned long depth) { unsigned long depthIndex; unsigned long currentDepthHeight; unsigned long currentDepthWidth; currentDepthHeight = y; currentDepthWidth = x; ExpandQuad8To16((unsigned char*)pSrc, (unsigned short*)pDst, y*rowQuads*4); sFWVT_4_Quad16_Int(pDst, pTemp, currentDepthWidth, rowQuads, currentDepthHeight ); sFWVT_4_Quad16_Int_Vertical(pTemp, pDst, currentDepthHeight, rowQuads, currentDepthWidth); for (depthIndex = 1; depthIndex < depth; depthIndex++) { currentDepthHeight >>= 1; currentDepthWidth >>= 1; sFWVT_4_Quad16_Int( pDst, pTemp, currentDepthWidth, rowQuads, currentDepthHeight ); sFWVT_4_Quad16_Int_Vertical(pTemp, pDst, currentDepthHeight, rowQuads, currentDepthWidth); } } void sIFWVT_4_Quad16_2DInt(signed short *pSrc, signed short *pDst, signed short *pTemp, unsigned long x, unsigned long y, unsigned long rowQuads, unsigned long depth) { unsigned long depthIndex; unsigned long currentDepthHeight; unsigned long currentDepthWidth; currentDepthHeight = y >> (depth); currentDepthWidth = x >> (depth); if (depth) { for (depthIndex = 0; depthIndex < depth-1; depthIndex++) { currentDepthHeight <<= 1; currentDepthWidth <<= 1; sIFWVT_4_Quad16_Int_Vertical(pSrc, pTemp, currentDepthHeight, rowQuads, currentDepthWidth); sIFWVT_4_Quad16_Int(pTemp, pSrc, currentDepthWidth, rowQuads, currentDepthHeight ); } currentDepthHeight <<= 1; currentDepthWidth <<= 1; sIFWVT_4_Quad16_Int_Vertical(pSrc, pTemp, currentDepthHeight, rowQuads, currentDepthWidth); sIFWVT_4_Quad16_Int(pTemp, pSrc, currentDepthWidth, rowQuads, currentDepthHeight ); Pack16To8(pSrc, (unsigned char *)pDst, y * rowQuads * 4); } }